rr setwd(~/Desktop/af-werx)
The working directory was changed to /Users/datasociety/Desktop/af-werx inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
rr getwd()
[1] \/Users/datasociety/Desktop/af-werx/data\
rr main_dir = ~/Desktop/af-werx
data_dir = paste0(main_dir, /data) data_dir
[1] \~/Desktop/af-werx/data\
rr plot_dir = paste0(main_dir, /plots) plot_dir
[1] \~/Desktop/af-werx/plots\
rr setwd(data_dir) getwd()
[1] \/Users/datasociety/Desktop/af-werx/data\
rr #load(_tables.RData) flights = nycflights13::flights
CMP = read.csv("ChemicalManufacturingProcess.csv", header = T, stringsAsFactors = F)
View(CMP)
column_ids = c(1:4,14:16)
column_ids
[1] 1 2 3 4 14 15 16
CMP_subset = CMP[,column_ids]
str(CMP_subset)
'data.frame': 176 obs. of 7 variables:
$ Yield : num 38 42.4 42 41.4 42.5 ...
$ BiologicalMaterial01 : num 6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
$ BiologicalMaterial02 : num 49.6 61 61 61 63.3 ...
$ BiologicalMaterial03 : num 57 67.5 67.5 67.5 72.2 ...
$ ManufacturingProcess01: num NA 0 0 0 10.7 12 11.5 12 12 12 ...
$ ManufacturingProcess02: num NA 0 0 0 0 0 0 0 0 0 ...
$ ManufacturingProcess03: num NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
summary(CMP_subset$Yield)
Min. 1st Qu. Median Mean 3rd Qu. Max.
35.25 38.75 39.97 40.18 41.48 46.34
boxplot(CMP_subset$Yield)
boxplot(CMP_subset$Yield, col= "orange", main ="Yield Summary")
colors()[1:6]
[1] "white" "aliceblue" "antiquewhite" "antiquewhite1" "antiquewhite2" "antiquewhite3"
# display color help and examples
?colors
demo(colors)
demo(colors)
---- ~~~~~~
> ### ----------- Show (almost) all named colors ---------------------
>
> ## 1) with traditional 'graphics' package:
> showCols1 <- function(bg = "gray", cex = 0.75, srt = 30) {
+ m <- ceiling(sqrt(n <- length(cl <- colors())))
+ length(cl) <- m*m; cm <- matrix(cl, m)
+ ##
+ require("graphics")
+ op <- par(mar=rep(0,4), ann=FALSE, bg = bg); on.exit(par(op))
+ plot(1:m,1:m, type="n", axes=FALSE)
+ text(col(cm), rev(row(cm)), cm, col = cl, cex=cex, srt=srt)
+ }
> showCols1()
> ## 2) with 'grid' package:
> showCols2 <- function(bg = "grey", cex = 0.75, rot = 30) {
+ m <- ceiling(sqrt(n <- length(cl <- colors())))
+ length(cl) <- m*m; cm <- matrix(cl, m)
+ ##
+ require("grid")
+ grid.newpage(); vp <- viewport(w = .92, h = .92)
+ grid.rect(gp=gpar(fill=bg))
+ grid.text(cm, x = col(cm)/m, y = rev(row(cm))/m, rot = rot,
+ vp=vp, gp=gpar(cex = cex, col = cm))
+ }
> showCols2()
> showCols2(bg = "gray33")
> ###
>
> ##' @title Comparing Colors
> ##' @param col
> ##' @param nrow
> ##' @param ncol
> ##' @param txt.col
> ##' @return the grid layout, invisibly
> ##' @author Marius Hofert, originally
> plotCol <- function(col, nrow=1, ncol=ceiling(length(col) / nrow),
+ txt.col="black") {
+ stopifnot(nrow >= 1, ncol >= 1)
+ if(length(col) > nrow*ncol)
+ warning("some colors will not be shown")
+ require(grid)
+ grid.newpage()
+ gl <- grid.layout(nrow, ncol)
+ pushViewport(viewport(layout=gl))
+ ic <- 1
+ for(i in 1:nrow) {
+ for(j in 1:ncol) {
+ pushViewport(viewport(layout.pos.row=i, layout.pos.col=j))
+ grid.rect(gp= gpar(fill=col[ic]))
+ grid.text(col[ic], gp=gpar(col=txt.col))
+ upViewport()
+ ic <- ic+1
+ }
+ }
+ upViewport()
+ invisible(gl)
+ }
> ## A Chocolate Bar of colors:
> plotCol(c("#CC8C3C", paste0("chocolate", 2:4),
+ paste0("darkorange", c("",1:2)), paste0("darkgoldenrod", 1:2),
+ "orange", "orange1", "sandybrown", "tan1", "tan2"),
+ nrow=2)
> ##' Find close R colors() to a given color {original by Marius Hofert)
> ##' using Euclidean norm in (HSV / RGB / ...) color space
> nearRcolor <- function(rgb, cSpace = c("hsv", "rgb255", "Luv", "Lab"),
+ dist = switch(cSpace, "hsv" = 0.10, "rgb255" = 30,
+ "Luv" = 15, "Lab" = 12))
+ {
+ if(is.character(rgb)) rgb <- col2rgb(rgb)
+ stopifnot(length(rgb <- as.vector(rgb)) == 3)
+ Rcol <- col2rgb(.cc <- colors())
+ uniqC <- !duplicated(t(Rcol)) # gray9 == grey9 (etc)
+ Rcol <- Rcol[, uniqC] ; .cc <- .cc[uniqC]
+ cSpace <- match.arg(cSpace)
+ convRGB2 <- function(Rgb, to)
+ t(convertColor(t(Rgb), from="sRGB", to=to, scale.in=255))
+ ## the transformation, rgb{0..255} --> cSpace :
+ TransF <- switch(cSpace,
+ "rgb255" = identity,
+ "hsv" = rgb2hsv,
+ "Luv" = function(RGB) convRGB2(RGB, "Luv"),
+ "Lab" = function(RGB) convRGB2(RGB, "Lab"))
+ d <- sqrt(colSums((TransF(Rcol) - as.vector(TransF(rgb)))^2))
+ iS <- sort.list(d[near <- d <= dist])# sorted: closest first
+ setNames(.cc[near][iS], format(zapsmall(d[near][iS]), digits=3))
+ }
> nearRcolor(col2rgb("tan2"), "rgb")
0.0 21.1 25.8 29.5
"tan2" "tan1" "sandybrown" "sienna1"
> nearRcolor(col2rgb("tan2"), "hsv")
0.0000 0.0410 0.0618 0.0638 0.0667 0.0766 0.0778
"tan2" "sienna2" "coral2" "tomato2" "tan1" "coral" "sienna1"
0.0900 0.0912 0.0918
"sandybrown" "coral1" "tomato"
> nearRcolor(col2rgb("tan2"), "Luv")
0.00 7.42 7.48 12.41 13.69
"tan2" "tan1" "sandybrown" "orange3" "orange2"
> nearRcolor(col2rgb("tan2"), "Lab")
0.00 5.56 8.08 11.31
"tan2" "tan1" "sandybrown" "peru"
> nearRcolor("#334455")
0.0867
"darkslategray"
> ## Now, consider choosing a color by looking in the
> ## neighborhood of one you know :
>
> plotCol(nearRcolor("deepskyblue", "rgb", dist=50))
> plotCol(nearRcolor("deepskyblue", dist=.1))
> plotCol(nearRcolor("tomato", "rgb", dist= 50), nrow=3)
> plotCol(nearRcolor("tomato", "hsv", dist=.12), nrow=3)
> plotCol(nearRcolor("tomato", "Luv", dist= 25), nrow=3)
> plotCol(nearRcolor("tomato", "Lab", dist= 18), nrow=3)
set.seed(2)
n_cols = ncol(CMP_subset)
col_sample = sample(colors(), n_cols)
col_sample
[1] "lightgray" "lavenderblush4" "grey12" "grey88" "gray51"
[6] "ivory4" "grey36"
boxplot(CMP_subset, col = col_sample)
hist(CMP_subset$Yield, plot = F)
$breaks
[1] 35 36 37 38 39 40 41 42 43 44 45 46 47
$counts
[1] 1 3 16 31 39 32 21 20 10 2 0 1
$density
[1] 0.005681818 0.017045455 0.090909091 0.176136364 0.221590909 0.181818182 0.119318182 0.113636364
[9] 0.056818182 0.011363636 0.000000000 0.005681818
$mids
[1] 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5
$xname
[1] "CMP_subset$Yield"
$equidist
[1] TRUE
attr(,"class")
[1] "histogram"
hist(CMP_subset$Yield, col=col_sample[1:3], xlab = "Yield", main = "Dist. of Yield" )
par(mfrow = c(1,2))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")
par(mfrow = c(1,3))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")
hist(CMP_subset$BiologicalMaterial03, col=col_sample[4], xlab = "Bio Material 3", main = "Dist. of Bio Material 3")
plot(CMP_subset[,2],CMP_subset[,1], xlab = "Bio Material 1", ylab = "Yield", main = "Bio. Material 1 vs Yield",pch = 4, cex = 2, col ="steelblue")
pairs(CMP_subset[,1:4], pch=19, col="steelblue")
install.packages("corrplot")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/corrplot_0.84.tgz'
Content type 'application/x-gzip' length 5452777 bytes (5.2 MB)
==================================================
downloaded 5.2 MB
The downloaded binary packages are in
/var/folders/pz/0fvcbrdd2hb75sqqgw0q7thw0000gn/T//RtmpOJSTTK/downloaded_packages
library(corrplot)
corrplot 0.84 loaded
library(help="corrplot")
CMP_cor = cor(CMP_subset[,1:4])
View(CMP_cor)
corrplot(CMP_cor, method = "pie")
corrplot.mixed(CMP_cor)
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
library(ggplot2)
?ggplot2
ggp1 = ggplot(CMP_subset, aes(x=Yield))
ggp1
ggp1 = ggp1 + geom_histogram(aes(y= ..density..), binwidth = 0.75, color="steelblue", fill="gray")
ggp1 = ggp1 + geom_density(alpha=.5, color="gray", fill="steelblue")
ggp1
ggp1 = ggp1 + labs(title="Distrobution", subtitle="Histogram & Density")
ggp1
ggp2 = ggplot(CMP_subset, aes(x=BiologicalMaterial01, y=Yield))
ggp2
ggp2 = ggp2 + geom_point()
ggp2
ggp2 = ggp2 +
geom_point(color="darkorange")+
geom_smooth(method = lm)+
labs(title="Bio. Material 1 vs Yield", subtitle = "Scatterplot with linear fit")
ggp2
ggtheme1 = theme_bw() + theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 16),
plot.title = element_text(size = 25),
plot.subtitle = element_text(size = 18))
ggp2 = ggp2 + ggtheme1
ggp2